return 1;
}
-void domain_relinquish_memory(struct domain *d)
+void domain_relinquish_resources(struct domain *d)
{
dummy();
}
struct pt_regs *get_execution_context(void) { return ia64_task_regs(current); }
-void cleanup_writable_pagetable(struct domain *d, int what) { return; }
-
void raise_actimer_softirq(void)
{
raise_softirq(AC_TIMER_SOFTIRQ);
mk_l3_pgentry(__pa(d->arch.mm_perdomain_l2) | __PAGE_HYPERVISOR);
#endif
+ (void)ptwr_init(d);
+
shadow_lock_init(d);
}
}
return op;
}
-static void relinquish_list(struct domain *d, struct list_head *list)
+#ifdef CONFIG_VMX
+static void vmx_relinquish_resources(struct exec_domain *ed)
+{
+ if ( !VMX_DOMAIN(ed) )
+ return;
+
+ BUG_ON(ed->arch.arch_vmx.vmcs == NULL);
+ free_vmcs(ed->arch.arch_vmx.vmcs);
+ ed->arch.arch_vmx.vmcs = 0;
+
+ free_monitor_pagetable(ed);
+ rem_ac_timer(&ed->arch.arch_vmx.vmx_platform.vmx_pit.pit_timer);
+}
+#else
+#define vmx_relinquish_resources(_ed) ((void)0)
+#endif
+
+static void relinquish_memory(struct domain *d, struct list_head *list)
{
struct list_head *ent;
struct pfn_info *page;
spin_unlock_recursive(&d->page_alloc_lock);
}
-#ifdef CONFIG_VMX
-static void vmx_domain_relinquish_memory(struct exec_domain *ed)
-{
- struct vmx_virpit_t *vpit = &(ed->arch.arch_vmx.vmx_platform.vmx_pit);
- /*
- * Free VMCS
- */
- ASSERT(ed->arch.arch_vmx.vmcs);
- free_vmcs(ed->arch.arch_vmx.vmcs);
- ed->arch.arch_vmx.vmcs = 0;
-
- free_monitor_pagetable(ed);
- rem_ac_timer(&(vpit->pit_timer));
-}
-#endif
-
-void domain_relinquish_memory(struct domain *d)
+void domain_relinquish_resources(struct domain *d)
{
struct exec_domain *ed;
BUG_ON(d->cpuset != 0);
+ ptwr_destroy(d);
+
/* Release device mappings of other domains */
- gnttab_release_dev_mappings( d->grant_table );
+ gnttab_release_dev_mappings(d->grant_table);
/* Exit shadow mode before deconstructing final guest page table. */
shadow_mode_disable(d);
pagetable_val(ed->arch.guest_table_user) >> PAGE_SHIFT]);
ed->arch.guest_table_user = mk_pagetable(0);
}
- }
-#ifdef CONFIG_VMX
- if ( VMX_DOMAIN(d->exec_domain[0]) )
- for_each_exec_domain ( d, ed )
- vmx_domain_relinquish_memory(ed);
-#endif
+ vmx_relinquish_resources(ed);
+ }
/*
* Relinquish GDT mappings. No need for explicit unmapping of the LDT as
destroy_gdt(ed);
/* Relinquish every page of memory. */
- relinquish_list(d, &d->xenpage_list);
- relinquish_list(d, &d->page_list);
+ relinquish_memory(d, &d->xenpage_list);
+ relinquish_memory(d, &d->page_list);
}
* Writable Pagetables
*/
-ptwr_info_t ptwr_info[NR_CPUS];
-
#ifdef VERBOSE
int ptwr_debug = 0x0;
#define PTWR_PRINTK(_f, _a...) \
#endif
/* Flush the given writable p.t. page and write-protect it again. */
-void ptwr_flush(const int which)
+void ptwr_flush(struct domain *d, const int which)
{
unsigned long sstat, spte, pte, *ptep, l1va;
l1_pgentry_t *sl1e = NULL, *pl1e, ol1e, nl1e;
l2_pgentry_t *pl2e;
- int i, cpu = smp_processor_id();
- struct exec_domain *ed = current;
- struct domain *d = ed->domain;
+ int i;
unsigned int modified = 0;
- l1va = ptwr_info[cpu].ptinfo[which].l1va;
+ l1va = d->arch.ptwr[which].l1va;
ptep = (unsigned long *)&linear_pg_table[l1_linear_offset(l1va)];
/*
* STEP 2. Validate any modified PTEs.
*/
- pl1e = ptwr_info[cpu].ptinfo[which].pl1e;
+ pl1e = d->arch.ptwr[which].pl1e;
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
{
- ol1e = ptwr_info[cpu].ptinfo[which].page[i];
+ ol1e = d->arch.ptwr[which].page[i];
nl1e = pl1e[i];
if ( likely(l1_pgentry_val(ol1e) == l1_pgentry_val(nl1e)) )
* Make the remaining p.t's consistent before crashing, so the
* reference counts are correct.
*/
- memcpy(&pl1e[i], &ptwr_info[cpu].ptinfo[which].page[i],
+ memcpy(&pl1e[i], &d->arch.ptwr[which].page[i],
(L1_PAGETABLE_ENTRIES - i) * sizeof(l1_pgentry_t));
domain_crash();
break;
unmap_domain_mem(pl1e);
perfc_incr_histo(wpt_updates, modified, PT_UPDATES);
- ptwr_info[cpu].ptinfo[which].prev_exec_domain = ed;
- ptwr_info[cpu].ptinfo[which].prev_nr_updates = modified;
+ d->arch.ptwr[which].prev_nr_updates = modified;
/*
* STEP 3. Reattach the L1 p.t. page into the current address space.
if ( (which == PTWR_PT_ACTIVE) && likely(!shadow_mode_enabled(d)) )
{
- pl2e = &linear_l2_table[ptwr_info[cpu].ptinfo[which].l2_idx];
+ pl2e = &linear_l2_table[d->arch.ptwr[which].l2_idx];
*pl2e = mk_l2_pgentry(l2_pgentry_val(*pl2e) | _PAGE_PRESENT);
}
* STEP 4. Final tidy-up.
*/
- ptwr_info[cpu].ptinfo[which].l1va = 0;
+ d->arch.ptwr[which].l1va = 0;
if ( unlikely(sl1e != NULL) )
{
};
/* Write page fault handler: check if guest is trying to modify a PTE. */
-int ptwr_do_page_fault(unsigned long addr)
+int ptwr_do_page_fault(struct domain *d, unsigned long addr)
{
- unsigned long pte, pfn, l2e;
- struct pfn_info *page;
- l2_pgentry_t *pl2e;
- int which, cpu = smp_processor_id();
- u32 l2_idx;
- struct exec_domain *ed = current;
+ unsigned long pte, pfn, l2e;
+ struct pfn_info *page;
+ l2_pgentry_t *pl2e;
+ int which;
+ u32 l2_idx;
/* Can't use linear_l2_table with external tables. */
- BUG_ON(shadow_mode_external(ed->domain));
+ BUG_ON(shadow_mode_external(d));
/*
* Attempt to read the PTE that maps the VA being accessed. By checking for
/* We are looking only for read-only mappings of p.t. pages. */
if ( ((pte & (_PAGE_RW | _PAGE_PRESENT)) != _PAGE_PRESENT) ||
((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
- (page_get_owner(page) != ed->domain) )
+ (page_get_owner(page) != d) )
{
return 0;
}
#endif
/* Writable pagetables are not yet SMP safe. Use emulator for now. */
- if ( (ed->eid != 0) || (ed->ed_next_list != NULL) )
+ if ( d->exec_domain[0]->ed_next_list != NULL )
goto emulate;
/* Get the L2 index at which this L1 p.t. is always mapped. */
* The ptwr_flush call below will restore the PRESENT bit.
*/
if ( likely(l2e & _PAGE_PRESENT) ||
- (ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va &&
- (l2_idx == ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx)) )
+ (d->arch.ptwr[PTWR_PT_ACTIVE].l1va &&
+ (l2_idx == d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx)) )
which = PTWR_PT_ACTIVE;
}
* We only allow one ACTIVE and one INACTIVE p.t. to be updated at at
* time. If there is already one, we must flush it out.
*/
- if ( ptwr_info[cpu].ptinfo[which].l1va )
- ptwr_flush(which);
+ if ( d->arch.ptwr[which].l1va )
+ ptwr_flush(d, which);
/*
* If last batch made no updates then we are probably stuck. Emulate this
* update to ensure we make progress.
*/
- if ( (ptwr_info[cpu].ptinfo[which].prev_exec_domain == ed) &&
- (ptwr_info[cpu].ptinfo[which].prev_nr_updates == 0) )
- {
- /* Force non-emul next time, or we can get stuck emulating forever. */
- ptwr_info[cpu].ptinfo[which].prev_exec_domain = NULL;
+ if ( d->arch.ptwr[which].prev_nr_updates == 0 )
goto emulate;
- }
- ptwr_info[cpu].ptinfo[which].l1va = addr | 1;
- ptwr_info[cpu].ptinfo[which].l2_idx = l2_idx;
+ d->arch.ptwr[which].l1va = addr | 1;
+ d->arch.ptwr[which].l2_idx = l2_idx;
/* For safety, disconnect the L1 p.t. page from current space. */
if ( (which == PTWR_PT_ACTIVE) &&
- likely(!shadow_mode_enabled(ed->domain)) )
+ likely(!shadow_mode_enabled(d)) )
{
*pl2e = mk_l2_pgentry(l2e & ~_PAGE_PRESENT);
local_flush_tlb(); /* XXX Multi-CPU guests? */
}
/* Temporarily map the L1 page, and make a copy of it. */
- ptwr_info[cpu].ptinfo[which].pl1e = map_domain_mem(pfn << PAGE_SHIFT);
- memcpy(ptwr_info[cpu].ptinfo[which].page,
- ptwr_info[cpu].ptinfo[which].pl1e,
+ d->arch.ptwr[which].pl1e = map_domain_mem(pfn << PAGE_SHIFT);
+ memcpy(d->arch.ptwr[which].page,
+ d->arch.ptwr[which].pl1e,
L1_PAGETABLE_ENTRIES * sizeof(l1_pgentry_t));
/* Finally, make the p.t. page writable by the guest OS. */
MEM_LOG("ptwr: Could not update pte at %p\n", (unsigned long *)
&linear_pg_table[addr>>PAGE_SHIFT]);
/* Toss the writable pagetable state and crash. */
- unmap_domain_mem(ptwr_info[cpu].ptinfo[which].pl1e);
- ptwr_info[cpu].ptinfo[which].l1va = 0;
+ unmap_domain_mem(d->arch.ptwr[which].pl1e);
+ d->arch.ptwr[which].l1va = 0;
domain_crash();
return 0;
}
return EXCRET_fault_fixed;
}
-static __init int ptwr_init(void)
+int ptwr_init(struct domain *d)
{
- int i;
+ void *x = (void *)alloc_xenheap_page();
+ void *y = (void *)alloc_xenheap_page();
- for ( i = 0; i < smp_num_cpus; i++ )
+ if ( (x == NULL) || (y == NULL) )
{
- ptwr_info[i].ptinfo[PTWR_PT_ACTIVE].page =
- (void *)alloc_xenheap_page();
- ptwr_info[i].ptinfo[PTWR_PT_INACTIVE].page =
- (void *)alloc_xenheap_page();
+ if ( x != NULL )
+ free_xenheap_page((unsigned long)x);
+ if ( y != NULL )
+ free_xenheap_page((unsigned long)y);
+ return -ENOMEM;
}
+ d->arch.ptwr[PTWR_PT_ACTIVE].page = x;
+ d->arch.ptwr[PTWR_PT_INACTIVE].page = y;
+
return 0;
}
-__initcall(ptwr_init);
+void ptwr_destroy(struct domain *d)
+{
+ cleanup_writable_pagetable(d);
+ free_xenheap_page((unsigned long)d->arch.ptwr[PTWR_PT_ACTIVE].page);
+ free_xenheap_page((unsigned long)d->arch.ptwr[PTWR_PT_INACTIVE].page);
+}
unsigned long off, addr, fixup;
struct exec_domain *ed = current;
struct domain *d = ed->domain;
- extern int map_ldt_shadow_page(unsigned int);
- int cpu = ed->processor;
int ret;
__asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
{
LOCK_BIGLOCK(d);
- if ( unlikely(ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) &&
+ if ( unlikely(d->arch.ptwr[PTWR_PT_ACTIVE].l1va) &&
unlikely((addr >> L2_PAGETABLE_SHIFT) ==
- ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) )
+ d->arch.ptwr[PTWR_PT_ACTIVE].l2_idx) )
{
- ptwr_flush(PTWR_PT_ACTIVE);
+ ptwr_flush(d, PTWR_PT_ACTIVE);
UNLOCK_BIGLOCK(d);
return EXCRET_fault_fixed;
}
if ( (addr < PAGE_OFFSET) &&
((regs->error_code & 3) == 3) && /* write-protection fault */
- ptwr_do_page_fault(addr) )
+ ptwr_do_page_fault(d, addr) )
{
if ( unlikely(shadow_mode_enabled(d)) )
(void)shadow_fault(addr, regs);
* Copy a mapping from the guest's LDT, if it is valid. Otherwise we
* send the fault up to the guest OS to be handled.
*/
+ extern int map_ldt_shadow_page(unsigned int);
LOCK_BIGLOCK(d);
off = addr - LDT_VIRT_START(ed);
addr = ed->arch.ldt_base + off;
{
for_each_exec_domain(d, ed)
sched_rem_domain(ed);
- domain_relinquish_memory(d);
+ domain_relinquish_resources(d);
physdev_destroy_state(d);
put_domain(d);
}
{
if ( unlikely((page = alloc_domheap_page(d)) == NULL) )
{
- domain_relinquish_memory(d);
+ domain_relinquish_resources(d);
return list_empty(&page_scrub_list) ? -ENOMEM : -EAGAIN;
}
if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
{
- /* NB. May recursively lock from domain_relinquish_memory(). */
+ /* NB. May recursively lock from relinquish_memory(). */
spin_lock_recursive(&d->page_alloc_lock);
for ( i = 0; i < (1 << order); i++ )
}
else if ( likely(d != NULL) )
{
- /* NB. May recursively lock from domain_relinquish_memory(). */
+ /* NB. May recursively lock from relinquish_memory(). */
spin_lock_recursive(&d->page_alloc_lock);
for ( i = 0; i < (1 << order); i++ )
perfc_incrc(sched_ctx);
- if ( !is_idle_task(prev->domain) )
- {
- LOCK_BIGLOCK(prev->domain);
- cleanup_writable_pagetable(prev->domain);
- UNLOCK_BIGLOCK(prev->domain);
- }
-
#if defined(WAKE_HISTO)
if ( !is_idle_task(next) && next->wokenup ) {
ulong diff = (ulong)(now - next->wokenup);
extern int arch_final_setup_guestos(
struct exec_domain *, full_execution_context_t *);
-extern void domain_relinquish_memory(struct domain *);
+extern void domain_relinquish_resources(struct domain *);
struct arch_domain {
struct mm_struct *active_mm;
#ifndef __ASM_DOMAIN_H__
#define __ASM_DOMAIN_H__
+#include <xen/mm.h>
+
struct trap_bounce {
unsigned long error_code;
unsigned long cr2;
l3_pgentry_t *mm_perdomain_l3;
#endif
+ /* Writable pagetables. */
+ struct ptwr_info ptwr[2];
+
/* I/O-port access bitmap mask. */
u8 *iobmp_mask; /* Address of IO bitmap mask, or NULL. */
#endif
/* Writable Pagetables */
-typedef struct {
+struct ptwr_info {
/* Linear address where the guest is updating the p.t. page. */
unsigned long l1va;
/* Copy of the p.t. page, taken before guest is given write access. */
/* Index in L2 page table where this L1 p.t. is always hooked. */
unsigned int l2_idx; /* NB. Only used for PTWR_PT_ACTIVE. */
/* Info about last ptwr update batch. */
- struct exec_domain *prev_exec_domain; /* domain making the update */
- unsigned int prev_nr_updates; /* size of update batch */
-} ptwr_ptinfo_t;
-
-typedef struct {
- ptwr_ptinfo_t ptinfo[2];
-} __cacheline_aligned ptwr_info_t;
-
-extern ptwr_info_t ptwr_info[];
+ unsigned int prev_nr_updates;
+};
#define PTWR_PT_ACTIVE 0
#define PTWR_PT_INACTIVE 1
#define PTWR_CLEANUP_ACTIVE 1
#define PTWR_CLEANUP_INACTIVE 2
-void ptwr_flush(const int);
-int ptwr_do_page_fault(unsigned long);
-
-int new_guest_cr3(unsigned long pfn);
+int ptwr_init(struct domain *);
+void ptwr_destroy(struct domain *);
+void ptwr_flush(struct domain *, const int);
+int ptwr_do_page_fault(struct domain *, unsigned long);
-#define __cleanup_writable_pagetable(_what) \
-do { \
- int cpu = smp_processor_id(); \
- if ((_what) & PTWR_CLEANUP_ACTIVE) \
- if (ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) \
- ptwr_flush(PTWR_PT_ACTIVE); \
- if ((_what) & PTWR_CLEANUP_INACTIVE) \
- if (ptwr_info[cpu].ptinfo[PTWR_PT_INACTIVE].l1va) \
- ptwr_flush(PTWR_PT_INACTIVE); \
-} while ( 0 )
-
-#define cleanup_writable_pagetable(_d) \
- do { \
- if ( unlikely(VM_ASSIST((_d), VMASST_TYPE_writable_pagetables)) ) \
- __cleanup_writable_pagetable(PTWR_CLEANUP_ACTIVE | \
- PTWR_CLEANUP_INACTIVE); \
+#define cleanup_writable_pagetable(_d) \
+ do { \
+ if ( unlikely(VM_ASSIST((_d), VMASST_TYPE_writable_pagetables)) ) { \
+ if ( (_d)->arch.ptwr[PTWR_PT_ACTIVE].l1va ) \
+ ptwr_flush((_d), PTWR_PT_ACTIVE); \
+ if ( (_d)->arch.ptwr[PTWR_PT_INACTIVE].l1va ) \
+ ptwr_flush((_d), PTWR_PT_INACTIVE); \
+ } \
} while ( 0 )
#ifndef NDEBUG
#define audit_domains() ((void)0)
#endif
+int new_guest_cr3(unsigned long pfn);
+
void propagate_page_fault(unsigned long addr, u16 error_code);
/*
extern void free_perdomain_pt(struct domain *d);
-extern void domain_relinquish_memory(struct domain *d);
+extern void domain_relinquish_resources(struct domain *d);
extern void dump_pageframe_info(struct domain *d);